if __name__ == '__main__':
    import duckdb
    import pandas as pd

    # 加载CSV并注册为DuckDB表
    file_path = 'C:/Users/MarioZzz/Desktop/duckdb/test_cases/cab_ride_data.csv'
    df = pd.read_csv(file_path)
    con = duckdb.connect()
    con.register('cab_rides', df)

    # 执行SQL查询箱线图关键统计值
    query = """
    WITH stats AS (
        SELECT 
            MIN(total_distance_km) AS min_val,
            QUANTILE(total_distance_km, 0.25) AS q1,
            MEDIAN(total_distance_km) AS median,
            QUANTILE(total_distance_km, 0.75) AS q3,
            MAX(total_distance_km) AS max_val
        FROM cab_rides
    ),
    iqr AS (
        SELECT 
            q1,
            q3,
            (q3 - q1) AS iqr
        FROM stats
    ),
    outliers AS (
        SELECT 
            COUNT(*) AS outlier_count
        FROM cab_rides, iqr
        WHERE total_distance_km < (iqr.q1 - 1.5 * iqr.iqr)
           OR total_distance_km > (iqr.q3 + 1.5 * iqr.iqr)
    )
    SELECT 
        s.min_val,
        s.q1,
        s.median,
        s.q3,
        s.max_val,
        o.outlier_count
    FROM stats s, outliers o;
    """

    result = con.execute(query).fetchdf()
    print(result)
